// VoterSurveyEthnicity
log using VoterSurveyEthnicity.log, replace
use "C:\Users\sbstjp\OneDrive - Cardiff University\voter_panel.dta" // Democracy Fund Voter Study Group. VIEWS OF THE ELECTORATE RESEARCH SURVEY. Washington, D.C. https://www.voterstudygroup.org/.  Date accessed: March 09, 2025.

// Create social justice scale
*Delete missing
replace ft_blm_2020Sep=. if ft_blm_2020Sep>100
replace reparations_2020Sep=. if reparations_2020Sep>2
replace defundpolice_2020Sep=. if defundpolice_2020Sep>2 
replace police_threat_2020Sep=. if police_threat_2020Sep>2
replace usa_founders_2020Sep=. if usa_founders_2020Sep>2
replace internetharass_dem_2020Sep=. if internetharass_dem_2020Sep==9

*Reverse coding so social justice values are coded high
foreach var in reparations_2020Sep defundpolice_2020Sep usa_founders_2020Sep {
    qui sum `var'
    local max_value = r(max)
    gen r`var' = `max_value' - `var'
}

*Standardize items in the scale from 1-2 - this avoids 0, for reasons outlined in next step
foreach var in ft_blm_2020Sep rreparations_2020Sep rdefundpolice_2020Sep rusa_founders_2020Sep police_threat_2020Sep internetharass_dem_2020Sep {
    summarize `var'
    gen s`var' = 1 + (`var' - r(min)) / (r(max) - r(min))
}

*At this stage, the scale has a Cronbach's alpha of 0.83

* Replace missing values with 0 for the specified variables - this is necessary as Stata doesn't add up missing values
foreach var in sft_blm_2020Sep srreparations_2020Sep srdefundpolice_2020Sep srusa_founders_2020Sep spolice_threat_2020Sep sinternetharass_dem_2020Sep {
    replace `var' = 0 if missing(`var')
}

* Initialize the total score and the count of non-zero responses
gen total_scoreSJV = 0
gen count_nonzeroSJV = 0

* Add each variable to the total scale score and count it if non-zero
foreach var in sft_blm_2020Sep srreparations_2020Sep srdefundpolice_2020Sep srusa_founders_2020Sep spolice_threat_2020Sep sinternetharass_dem_2020Sep {
    replace total_scoreSJV = total_scoreSJV + `var'
    replace count_nonzeroSJV = count_nonzeroSJV + (`var' != 0)
}

* Calculate the average score, avoiding division by zero
gen SocJusValues = .
replace SocJusValues = total_scoreSJV / count_nonzeroSJV if count_nonzeroSJV > 0

// Demographics
*Delete missing values, generate age variable and rename
replace faminc_2020Sep=. if faminc_2020Sep>20 
gen age = 2020 - birthyr_2020Sep 
rename gender_2020Sep FemaleGender 
replace ideo5_2020Sep=. if ideo5_2020Sep==6 

*Generate dummy variables
gen Graduate=.
replace Graduate=1 if inlist(educ_2020Sep, 5, 6) 
replace Graduate=0 if inrange(educ_2020Sep, 1, 4)

gen White=.
replace White=1 if race_2020Sep==1
replace White = 0 if inrange(race_2020Sep, 2, 8) 

gen Black=.
replace Black=1 if race_2020Sep==2
replace Black=0 if race_2020Sep==1 
replace Black=0 if inrange(race_2020Sep, 3, 8)

gen Hispanic=.
replace Hispanic=1 if race_2020Sep==3
replace Hispanic=0 if inlist(race_2020Sep, 1, 2)
replace Hispanic=0 if inrange(race_2020Sep, 4, 8)

gen Asian=. 
replace Asian=1 if race_2020Sep==4
replace Asian=0 if inrange(race_2020Sep, 1, 3) 
replace Asian=0 if inrange(race_2020Sep, 5, 8)

// Standardizations
egen Age = std(age)
egen Income = std(faminc_2020Sep)

// Regressions
regress SocJusValues Age FemaleGender Graduate Income White [pweight=weight_genpop_2020Sep], robust 
eststo
regress SocJusValues Age FemaleGender Graduate Income Black [pweight=weight_genpop_2020Sep], robust 
eststo
regress SocJusValues Age FemaleGender Graduate Income Hispanic [pweight=weight_genpop_2020Sep], robust 
eststo
regress SocJusValues Age FemaleGender Graduate Income Asian [pweight=weight_genpop_2020Sep], robust 
eststo
esttab
eststo clear

// Correlations
pwcorr SocJusValues White Black Hispanic Asian [aweight=weight_genpop_2020Sep], sig 

log close